
	* PROJECT: GHANA SCHOOL FEEDING & LEARNING. 
	* This dofile constructs the child-level file for the main analyses. 
	* EA, 25 Sept 2020
	
	**** baseline ****
	* roster

	use "$baseline/Household/seca.dta" , clear
			label var qid "Household id"
			label var u_id "Unique identifier: HH+ person"
			ren hha3 age
			ren hha4 hh_rel
			
	merge m:1 qid using "$baseline/Household/sec00.dta", keepusing (locid urbrur lang ethnicity enum) 
		drop _m 
		
			***gen hh size variable
			bys qid: gen hhsize=_N
			label var hhsize "Household size"
			
			gen targetage = 0 if age !=.
			replace targetage =1 if age >=4 & age<16
			label var targetage "Target age 4-15 years"
			
			*** gen n siblings below 5
			bys qid: gen tag=1 if age<5
			bys qid: egen tag1=count(tag)
			ren tag1 n_under5
			label var n_under5 "Number of children under 5 years"
			drop tag
			
			* gen number of children target age
			bys qid: gen tag=1 if targetage==1
			bys qid: egen tag1=count(tag)
			ren tag1 n_target 
			label var n_target "Number of children of target age (4-15y)"
			drop tag
			
			* gen headsex 
			gen headid1=u_id if hh_rel==1
			destring headid, force replace
			bys qid: egen headid = min(headid1)

			label var headid1 "Head of the household id"
			gen headmale1=hha2 if hha2==1 & hh_rel==1
				replace headmale1=0 if hha2 ==2 & hh_rel==1
			bys qid: egen headmale =min(headmale1)
			label var headmale "Head of the household is male"
				
			* gen headage
			gen headage1 = age if hh_rel==1
			bys qid: egen headage= min(headage1)
			drop headage1 headmale1 headid1
			label var headage "Age of household head"			
			
			* gen polygamous household
			gen polygamous = 0 
				replace polygamous =1 if hh_rel ==3 | hh_rel==4 | hh_rel==5
				label var polygamous "Household is polygamous"
			
			gen mumid1=u_id if hh_rel==2 & headmale==1 & hha2==2
				replace mumid1=u_id if hh_rel ==1 & headmale ==0 & hha2==2 
			destring mumid1, force replace
			bys qid: egen mumid =min(mumid1)
			label var mumid "Mother id"
			
			gen mumage1 = age if mumid1!=.
			bys qid: egen mumage =min(mumage1)
			label var mumage "Mother's age"
			
			drop mumage1 mumid1 
			
			*** clean enumerator 
			replace enum = "ADWOA ACHIAA SARPONG" if enum =="ADJOA ACHIAA SARPONG" | enum =="ADWOA ACHAA SARPONG" | ///
				enum =="ADWOA ACHIAA SARONG" | enum =="ADWOA ACHIAA SARPONE"
			
			replace enum = "ALOYSIUS ZIEM" if enum == "ALOYSUIS ZIEM"
			
			replace enum = "ATO OKYNE" if enum == "ATO KOYNE" | enum =="ATO OKINE"
			
			replace enum = "AUGUSTINE ABAKAH SAM"  if enum =="AUGUSTINE ABAKAH-SAM" | enum =="AUGUSTINE ABAKAN-SAM" | ///
				enum =="AUGUSTINE ABAKAU-SAM"
			
			replace enum = "JAMES ALLOTEY ANNAN" if enum =="JAMES ALLOTEY-ANNAN"
			replace enum = "YAW BARIMAH" if enum == "YAW BERIMAH" | enum =="YAW BHRIMAH"
			
			bys enum: gen enum2=_N
			label var enum2 "Unique identifier for enumerator"
			
			keep qid pid u_id hha2 age hh_rel region district hhsize targetage n_target ///
			mumage mumid headid headmale headage polygamous n_under5 locid urbrur lang ethnicity enum2
		
		tempfile roster
		save `roster'
		
		***** ANTHRO ****
	use "$baseline/Household/sec00.dta" , clear
			keep qid day month year locid
			merge 1:m qid using "$baseline/Household/secs.dta" 
			drop if _m==1
			drop _m
			
			*** basic data cleaning
			recode hhs1 hhs2 hhs3 hhs4 (99=.) (9999=.)
			recode hhs4 (20=.) (5=.) 
			
			* clean year birth variable 
			gen year_prov = 2013 - hhs1 if hhs4==.
			replace hhs4 = year_prov if hhs4 ==. 
			*151 changes
				
			generate birthday=mdy(hhs3, hhs2, hhs4)
			format birthday %d

			generate dateint = mdy(month, day, year)
			format dateint %d
			* clean dateint var
			sort qid pid
			gen tag=1 if birthday!=. & dateint==.
			bys locid: gen proxydate = dateint 
			format proxydate %d
			bys locid: egen proxy2=min(proxydate)
			format proxy2 %d
			
			replace dateint = proxy2 if tag==1
			
			drop tag proxy2 proxydate
			
			gen agedy= (dateint - birthday)
			gen agemonth = agedy/ (365/12)
			gen agechild=floor(agemonth)  
			
			label var agechild "Child age in months"
			
			* clean age in months variable - unfortuantely there are dates of birth missing from children
			* but we still have years so we can have a rough measure of how old they are for the anthro
			
			gen tag=1 if hhs4 !=. & birthday==.
			* i assign to all of these children the same date of birth 30 june (if they have both day and month missign), so
			* they are in the middle of the year
			* missing info for 1500 kids(20%)
			
			replace hhs3=6 if tag==1 & hhs3==.
			replace hhs2 =30 if tag==1 & hhs2==.
			
			replace birthday=mdy(hhs3, hhs2, hhs4) if tag==1
			replace agedy= (dateint - birthday) if tag==1
			replace agemonth = agedy/ (365/12) if tag==1
			replace agechild=floor(agemonth)  if tag==1
			
			drop tag agedy agemonth 
			merge 1:1 qid pid using `roster', keepusing(hha2)
			keep if _m==3
			drop _m
			
			*** clean agechild var
			recode agechild (-2=.)
			gen ageyear = agechild/12
			compare ageyear hhs1
			gen tag =1 if ageyear != hhs1

			* they seem quite comparable, the one generated from the age in months is obviously more accurate
			drop tag
			
			* clean height variable
			replace hhs6 =. if hhs6 >180 & hhs1 <10

			gen tag=1 if hhs6 <50 & hhs1 >3
			gen height1= hhs6 *100 if hhs1 >3
			* remove values from height1 that do not seem plausible
			replace height1 =. if height1 >2000
			replace height1 = height1/10 if height1>=1000
			replace height1 =. if height1 >150 & hhs1 <6
			replace hhs6 = height1 if tag==1
			recode hhs6 (0=.) (715.3=.)
			replace hhs6=. if hhs6<25
			* clean weight variables
			recode hhs5 (0=.)
			replace hhs5 =. if hhs5 < 7 & hhs1 ==13
			replace hhs5 =. if hhs5 < 2 
			replace hhs5 =. if hhs5>80 & hhs1 <=3
			
			ren hhs7 sick
			recode sick (2=0)
			
		tempfile cleananthro
		save `cleananthro'
		
		**** gen z-scores for children between 0-59 months with zscore06
			keep if  agechild <60
			zscore06 , a(agechild) s(hha2) h(hhs6) w(hhs5)
			
			* replace implausible values
			foreach v of varlist haz06 waz06 whz06 bmiz06 {
				replace `v' =. if `v' < -5 | `v' >5
				}
			ren haz06 haz
			ren waz06 waz 
			ren whz06 whz
			ren bmiz06 baz
			
			ren  hhs5 chweight
			ren  hhs6 chheight
			label var chheight "Child height (cm)"
			label var chweight "Child weight (kg)"
			
			keep qid pid sick agechild haz waz baz whz birthday chheight chweight 
		tempfile anthro06
		save `anthro06'
		
		**** now i compute the anthro for the older children - this time i need to use the WHO 2006 reference standards for older children
		use `cleananthro', clear
			keep if agechild >59
			
			*Growth Charts used: version US - because it's newest and i think it is the one commonly used
			* Note: Clement used the UK 1990 one
			**height_for_age
			egen haz = zanthro(hhs6, ha, US), xvar(agechild) gender(hha2) gencode(male=1, female=2) ageunit(month) 
			
			* bmi for age
			gen hhs6m=hhs6/100
			order hhs6m, after (hhs6)
			gen bmi=(hhs5/(hhs6m*hhs6m))
			replace bmi =. if bmi>100
			
			egen bcat = zbmicat(bmi), xvar(agechild) gender(hha2) gencode(male=1, female=2) ageunit(month)
			egen baz = zanthro(bmi, ba, US), xvar(agechild) gender(hha2) gencode(male=1, female=2) ageunit(month) 
			
			ren  hhs5 chweight
			ren  hhs6 chheight
			label var chheight "Child height (cm)"
			label var chweight "Child weight (kg)"
			
			keep qid pid haz baz bmi agechild sick birthday chheight chweight bcat sick
			
			gen ageyear =agechild/12
			gen targetage1 =0 if ageyear!=.
				replace targetage1=1 if ageyear>=4 & ageyear <16			
			* only 1 of 18 years, it seems that at the end they did not interviewed children from 2 years old
			
		append using `anthro06'
		
		tostring qid, gen(qid2)
		tostring pid, gen(pid2)
		gen v="00"
		egen u_id = concat(qid2 v pid2) 
		drop qid2 pid2
		tempfile anthro
		save `anthro'
		
		use `roster'
		merge 1:1 qid pid using `anthro'
				drop if _m==2
				drop _m
				*keep if targetage ==1
			*1499 sibling data removed
		tempfile roster_ant
		save `roster_ant'
			
		************ COGS ********************************
		**This section tries to compute Scores on language, maths and other tests 
		use "$baseline/Household/secu.dta" , clear
			merge 1:1 u_id using "$baseline/Household/secv.dta"
			ren _merge merge1
			merge 1:1 u_id using "$baseline/Household/secw.dta"
			ren _merge merge2
			merge 1:1 u_id using "$baseline/Household/sect.dta"
			ren _merge merge3
		
		*score one mark for each question the child answers correctly
			*literacy
			foreach var of varlist hhu1 - hhu15 {
			gen `var'_rev = `var' if inlist(`var' , 1)
			}
			*maths
			foreach var of varlist hhw1 - hhw15 {
			gen `var'_rev = `var' if inlist(`var' , 1)
			}
			*Digit Span
			foreach var of varlist hht1 - hht12 {
			gen `var'_rev = `var' if inlist(`var' , 1)
			}
			*Raven
			foreach var of varlist hhv1 - hhv12 {
			gen `var'_rev = `var' if inlist(`var' , 1)
			}
		
		*Generate total scores for the tests

			*Literacy
			egen lit = rowtotal(hhu1_rev - hhu15_rev)
			drop hhu1_rev - hhu15_rev
			*mathematics
			egen maths = rowtotal(hhw1_rev - hhw15_rev)
			drop hhw1_rev - hhw15_rev
			*digit span
			egen digit = rowtotal(hht1_rev - hht12_rev)
			*raven test
			egen raven = rowtotal(hhv1_rev - hhv12_rev)
			drop hhv1_rev - hhv12_rev
			* forward and backwards digit span
			egen fdigit=rowtotal(hht1_rev-hht6_rev)
			egen bdigit = rowtotal(hht7_rev-hht12_rev)
			
			drop hht1_rev - hht12_rev

			label var raven "Raven raw score"
			label var digit "Digit-span raw score"
			label var lit "Literacy raw score"
			label var maths "Maths raw score"
			label var fdigit "Forward digit-span raw score"
			label var bdigit "Backwards digit-span raw score"
			
			pca lit maths
			predict score
			ren score aggre_lear 
			label var aggre_lear "PCA literacy and maths"
			
			pca digit raven
			predict score
			ren score aggre_cog
			label var aggre_cog "Aggregate score cognition tests (digit, raven)"
			
			pca lit maths digit raven 
			predict score
			ren score aggre_edu
			label var aggre_edu "Aggregate score lit, maths, digit, raven"
			
		keep pid qid u_id raven maths lit digit aggre* fdigit bdigit
	tempfile cogs
	save `cogs'
		
	use `roster_ant'
			merge 1:1 u_id using `cogs'	
			keep if _m==3 | _m==1
	tempfile rosterupdate
	save `rosterupdate'		
		
	* education: note - section should be completed by everyone that was 5 or above in the hh

			use "$baseline/Household/secB.dta" , clear
			ren hhb1 ever_enr
			ren hhb2 grade_att
			ren hhb3 qual
			ren hhb5 enrol
			ren hhb6 age517
			ren hhb71 whynotsc
			
			gen private =  hhb16 ==3 |  hhb16==4
			
			ren hhb17 grade
			ren hhb20 absent
			ren hhb24 repeat
			recode hhb33 (0=.) (2=0)
			
			ren hhb40 sf 
			recode sf (0=.) (2=0)
			**** 20% of children were already receiving school feeding at baseline 
			
			ren hhb41 typesf
			ren hhb43 eatless
			recode eatless (2=0)
			
			ren hhb44 bringhome 
			recode bringhome (0=.) (4=.) (5=.) (2=0)
			
			ren hhb45 brkfast
			gen nobrk = brkfast ==0
			label var nobrk "Child does not eat breakfast"
			
			ren hhb46 foodscl
			
			recode hhb25 hhb26 hhb27 hhb30 (99=.) (999=.) (9998=.) (9999=.) (99999.99=.) (99999=.)
			egen fees = rowtotal (hhb25 hhb26 hhb27)
				replace fees=. if hhb25==. & hhb26==. & hhb27==.
			label var fees "Sum of school, PTA and uniform fees"

			gen hour_min=hhb12a *60
			gen time_scl = hour_min + hhb12b
			label var time_scl "Time to school in minutes"
			
		
			gen primary=0 if enrol==1 & grade > 16
				replace primary=1 if enrol==1 & grade >=11 & grade <=16
				*80% primary
			
			gen grade1_5 = 0 if enrol==1 & grade > 15
				replace grade1_5=1 if enrol==1 & grade >=11 & grade <=15
				*70% grade1 grade5
				
			gen secondary =0 if enrol==1 & grade <17 
				replace secondary=1 if enrol ==1 & grade >=17
			*9% of age517 enrolled in secondary
			
			gen primary_compl =0 if enrol ==2  & grade_att <16
				replace primary_compl =1 if  grade_att >=16 & enrol==2
			*42% of non-enrolled have finished primary
			
			gen grade6=0 if enrol==1 & grade <16
				replace grade6=1 if enrol==1 & grade==16
			*8 of enrolled are in grade6
			*8 of enrolled are in grade6
				
			keep qid u_id ever_enr grade_att qual enrol age517 whynotsc hhb8 hhb9 hhb10 hhb11 grade repeat ///
			hhb23 hhb25 hhb21 hhb26 hhb27 hhb28 hhb29 hhb30 hhb31 hhb32 hhb33 hhb35 hhb38 sf typesf hhb42 ///
			eatless bringhome brkfast foodscl hhb47 hhb48 hhb49 private nobrk absent hhb8 hhb9 fees time_scl primary grade1_5 secondary primary_compl grade6
			
			ren hhb30 cost_food 
			ren hhb33 inkind
			
		tempfile edu
		save `edu'
		
	**** MERGE ROSTER WITH EDUCATION OF TARGET CHILDREN
	
	use `rosterupdate' 
	drop _m
			merge 1:1 u_id using `edu'
			drop if _m==2
			* most of the children of target age are matched (only 31 over 6590 don't have data on education)
			drop _m 
	tempfile roster_edu
	save `roster_edu'
	
	* Add education of mother 
	* primary 
	*** note: emefa: mslc and bece (basic education certificate exam) are primary (2kg+ 6 primary +3jhs) ; wassce & vocational & gce o level ssce gce a level (3 years more) : secondary
	* tech, prof dip,  hnd are all tertiary
	use `roster' 
		merge 1:1 u_id using `edu'
		
			bys qid: egen mumid_new=min(mumid)
			destring u_id, gen(u_id1)
			gen tag=1 if mumid_new == u_id1
			gen edu=grade_att if tag==1
			bys qid: egen mum_grade =min(edu)
			replace mum_grade=0 if ever_enr ==2
			recode mum_grade (1=0) (10=0) (11=1) (12=2) (13=3) (14=4) (15=5) (16=6) (17=7) (18=8) (19=9) (20=10) (21=11) (22=12) (23=13) (24=10) (25=11) (26=12) (27=13) ///
			(35=7) (36=8) (37=9) (38=10) (39=10) (40=11) (41=12) (42=12) (43=12) (44=13) (45=14) (46=14) (47=14) (48=14) (33=6) (34=6) (28=7) (29=8) (30=9) (31=10)
			
			label var mum_grade "Maternal years of education"
			
			label drop hhb2
			
		keep u_id qid mum_grade
		
	tempfile mumedu
	save `mumedu'
	
	use `roster_edu', clear
		merge 1:1 u_id using `mumedu'
			keep if _m==3
			drop _m
			order mum_grade, last
	
	tempfile roster2
	save `roster2'
	
	* Add education of head of the household
	use `roster' 
		merge 1:1 u_id using `edu'
		
			bys qid: egen headid_new=min(headid)
			destring u_id, gen(u_id1)
			gen tag=1 if headid_new == u_id1
			gen edu=grade_att if tag==1
			bys qid: egen head_grade =min(edu)
			replace head_grade=0 if ever_enr ==2
			recode head_grade (1=0) (10=0) (11=1) (12=2) (13=3) (14=4) (15=5) (16=6) (17=7) (18=8) (19=9) (20=10) (21=11) (22=12) (23=13) (24=10) (25=11) (26=12) (27=13) ///
			(35=7) (36=8) (37=9) (38=10) (39=10) (40=11) (41=12) (42=12) (43=12) (44=13) (45=14) (46=14) (47=14) (48=14) (33=6) (34=6) (28=7) (29=8) (30=9) (31=10)
			label var head_grade "Head of the household years of education"
			label drop hhb2
			
		keep u_id qid head_grade
		
	tempfile headedu
	save `headedu'
	
	use `roster2', clear
		merge 1:1 u_id using `headedu'
			keep if _m==3
			drop _m
			order head_grade, last
			
	tempfile roster_update2
	save `roster_update2'
	
	* construct asset quintile groupings for the households
		
		use "$baseline/Household/secr1.dta" , clear
		
			drop hhr3 hhr4 hhr5 hhr1o region district
			sort qid hhr1
			rename hhr1 agric
			rename hhr2 does
			by qid, sort: gen id=_n
			order qid id
			reshape wide agric does , i(qid) j(id)
			rename does1 water_pump
			rename does2 borehole
			rename does3 tractor
			rename does4 land_management_tools
			rename does5 wheel_barrow
			rename does6 donkey_cart
			rename does7 saw
			rename does8 spray_pump
			rename does9 spray_hose
			rename does10 micro_sprinklers
			rename does11 animal_traction
			rename does12 weighing_machine
			rename does13 simple_tools
			rename does14 other
			drop agric1 agric2 agric3 agric4 agric5 agric6 agric7 agric8 agric9 agric10 agric11 agric12 agric13 agric14 agric15 - does15
			label var water_pump "water pump"
			label var borehole "borehole or well"
			label var tractor "tractor (including trailor)"
			label var land_management_tools "land management tools (harrow, tiller,"
			label var wheel_barrow	"wheel barrow"
			label var donkey_cart "donkey cart"
			label var saw "saw and power saw"
			label var spray_pump "spray pump (motor-blow)"
			label var spray_hose "spray hose for tractors"
			label var micro_sprinklers "micro sprinklers for irrigation"
			label var animal_traction "animal traction plough"
			label var weighing_machine "weighing machine for farming (scale)"
			label var simple_tools "simple tools (hoes, cutlasses, knapsack"
			label var other	"other"
			
		tempfile agriassets
		save `agriassets'
		
		use "$baseline/Household/secr2.dta" , clear
		
			drop region district hhr8 hhr9 hhr10
			by qid, sort: gen id=_n
			rename hhr7 asset
			rename hhr6 hhr
			reshape wide hhr asset, i(qid) j(id)
			
			label var asset1 "Fridge"
			label var asset2 "Radio"
			label var asset3 "TV"
			label var asset4 "Iron (electric)"
			label var asset5 "Box iron"
			label var asset6 "Computer (desktop, laptop, etc"
			label var asset7 "Telephone (landline)"
			label var asset8 "Mobile phone"
			label var asset9 "Sewing machine"
			label var asset10 "Generator"
			label var asset11 "Washing machine"
			label var asset12 "Weighing machine for household"
			label var asset13 "Mattress"
			label var asset14 "Bicycle"
			label var asset15 "Motorcycle"
			label var asset16 "Car"
			label var asset17 "Truck (open truck)"
			label var asset18 "Boat"
			label var asset19 "Canoe"
			label var asset20 "Jewellery / ornaments"
			label var asset21 "Gas stove"

			drop hhr*
		merge 1:1 qid using `agriassets'
		drop _m
		
			foreach v of varlist  asset1 - other {
				recode `v' (2=0)
				}
			
			*** PCA (as in clement's, all assets bumped together)
			pca asset1 - other, comp(2) vce(normal)
			predict x
			rename x wi_all
			egen quintile=cut(wi_all),group (5)
			label var wi_all "Wealth index, all assets"
			label variable quintile "Quintile group"
			
			*** PCA only durable assets
			pca asset1 - asset21, comp(2) vce(normal)
			predict x
			rename x wi_dur
			egen quint_dur=cut(wi_dur),group (5)
			label var wi_dur "Wealth index, durable assets"

			*** PCA only agric assets
			pca   water_pump - other, comp(2) vce(normal)
			predict x
			rename x wi_agr
			egen quint_agr=cut(wi_agr),group (5)
			label var wi_agr "Wealth index, agricultural assets"

			keep qid wi_all quintile wi_dur quint_dur wi_agr quint_agr
			
		tempfile pca
		save `pca'
	
	**** I want to create a child level dataset 
	use  `roster_update2'
			merge m:1 qid using `pca'
			keep if _m==3
			drop _m
			
	tempfile almost
	save `almost'
	
	* add anaemia: note i will add anaemia from the second baseline for 2 reasons:
	* 1. the baseline data were collected for the wrong sample of kids (oversample of controls and few hgsf; plus the over
	* sample was done in an area in which anaemia was really low so it is not representative
	* 2. using the second baseline resolves the issue of seasonality: first baseline done in rainy season while second baseline
	* in the same reason of the endline
	
	* this was the code for linking to the first baseline as I had originally done:
	/*
	use "$baseline/Anaemia/anaemiaf_1.dta" , clear
			duplicates tag qid pid, gen (tag)
			drop if qid ==. & pid==.
			drop if tag==1  & sex==1
			
			ren age age_a
			ren sex sex_a
			
			keep qid pid sex age hb
	tempfile anaemia
	save `anaemia'
	*/
	
	use "$second/children_QIDfix.dta" , clear
		keep qid pid hb malaria
	tempfile anaemia
	save `anaemia'
	
	use "$baseline/Household/secg_land.dta", clear
		recode hhg1 (2=0)
		collapse (sum)  hhg1 hhg3, by(qid )
		replace hhg1 =1 if hhg1>1
		ren hhg1 land_use
		label var land_use "Owned or used land"
		ren  hhg3 land_size 
		label var land_size "Land size"
		
	tempfile land
	save `land'
	
	use "$baseline/Household/seci_marketing.dta", clear
		recode hhi1 (2=0)
		ren hhi1 sold
		
		keep qid sold
		duplicates drop qid, force
	tempfile sales
	save `sales'
	
	use "$baseline/Household/secn_livestock.dta", clear
		recode hhn1 (2=0)
		collapse  hhn1 , by(qid )
		ren hhn1 livestock
	tempfile livestock
	save `livestock'	
		
	use `almost'
		merge 1:1 qid pid using `anaemia'
		
			drop if _m==2
			
			* set the cutoff for anaemia according to the WHO standrds
			* Hb < 115g/l for 6-11 years children,  Hb < 120g/l for 12-14 years children
			* Hb < 130g/l for males with age ≥ 15 years and Hb < 120g/l for non-pregnant females with age ≥ 15 years of age
			replace hb=100 if hb==10		
			gen anaemia =0 if hb!=.	
				replace anaemia =1 if hb<115 & age >=6 & age <=11
				replace anaemia =1 if hb<120 & age >=12 & age <=14
				replace anaemia =1 if hb<130 & age >=15 & hha2 ==1
				replace anaemia =1 if hb<120 & age >=15 & hha2 ==2
				
			label var anaemia "Child is anaemic"
		
	/*	
	tempfile haemo
	save `haemo'
	
	use "$baseline/
	*/
	*** cleaning vars
	
	*ren hha2 male
		*recode male (2=0)
		*label var male "Male"
		ren hb haemo
	
	gen wave =1
	order u_id pid locid, bef(region)
	drop _m
	merge m:1 qid using `land'
	drop _m
	merge m:1 qid using `sales'
	drop _m
	merge m:1 qid using `livestock'
	drop _m
	merge m:m qid using "$baseline/30032015_pcexpmumedu.dta"
		
	**** GEN VARIABLES FOLLOWING ALDERMAN ET AL 2012 NORTHERN UGANDA
	
		recode enrol (2=0)
		*gen primary school dummy
		
		label var primary "Child is enrolled in primary school"
		
		gen age_kind=0 if age!=. 
				replace age_kind =1 if age ==4 | age ==5
				label var age_kind "Child is aged 4-5 years"
				
		gen enrol_k=. if grade!=.
				replace enrol_k=0 if age_kind ==1 & grade!=1 | age_kind==1 & grade!=10
				replace enrol_k =1 if grade ==1 & age_kind==1 | grade==10 & age_kind==1
			label var enrol_k "Child aged 4-5 years is enrolled in kindergarten"
			
		gen enrol1 = .
				replace enrol1 =1 if enrol==1 & age>=6 & age<=15 & primary==1 
				replace enrol1=0 if enrol==0 & age>=4 & age<=15
		label var enrol1 "Number of children 4-15y enrolled in primary"
		* this is a standard measure of net primary school enrolment 
		* note: almost all kids of target age are in primary, only 2% are in jss

	
		gen abs_share = absent /7 if enrol1==1 | enrol_k==1
		label var abs_share "Share of school days in the past 7 days for enrolled 4-15-year-olds"
		
		gen net_att =1 - abs_share
		label var net_att "Share of school days attended conditional on enrolment 4-15-year-olds"
		
		gen male = hha2==1
		label var male "Male"
		drop _m
		
		gen pres_r1=1
		
	tempfile wave1_almost
	save `wave1_almost'
	
	* April 2019: add labour
	
	use "$baseline/Household/secc.dta", clear
	
		merge m:1 qid pid using `roster', keepusing (age hha2 )
		drop _m
		
		* only focus on main activity
	
		gen family_act = hhc1a==1 | hhc1a ==6 | hhc1a ==7 | hhc1a==10 & hhc1o =="FISHING"  | ///
			hhc1a==10 & hhc1o =="FISH MONGERING" | hhc1a==10 & hhc1o =="FISHING/FISH MONGER" | hhc1a==10 & hhc1o =="FISHING/FISH MONGER" | hhc1a==10 & hhc1o =="SELLS FISH"
		label var family_act "Family farm or business/enterprise"
		
		gen others_act = hhc1a==2 | hhc1a==4 | hhc1a==5 | hhc1a==8 
		label var others_act " Activity outside the household" 
		
		gen housework = hhc1a ==3
		label var housework "Housework" 
		
		gen not_working = hhc1a==9 | hhc1a==10 & hhc1o =="STILL IN SCHOOL" | hhc1a==10 & hhc1o =="STUDENT" 
		label var not_working "Not working" 
		
		drop hhc1b hhc1c hhc1o hhc2b hhc2c hhc3b hhc3c hhc4b hhc4c hhc5b hhc5c hhc6b hhc6c hhc1a
		
		ren hhc2a months_act
		label var months_act "Months spent in main activity"
		
		ren hhc3a weeks_act
		label var weeks_act "Weeks per month spent in main activity"
	
		ren hhc4a days_act
		label var days_act "Days per week spent in main activity"
		
		gen tot_weeks = months_act * weeks_act
		replace tot_weeks =48 if tot_weeks > 48 & tot_weeks!=.
		gen prop_weeks = tot_weeks / 48
		
		gen days_weighted = days_act * prop_weeks
			replace days_weig = 0 if not_working ==1
		label var days_weighted "Days per year spent in activity"
		
		*** i will focus on days per week doing activity. however, since there is variation in the number of months and weeks per months
		* children do the activity, i will create a proportionality factor (e.g. child doing household chores 5 days a week per 12 months and 4 weeks per months
		* will result as she works 5 days 
		
		gen day_hhwork = days_w if housework==1
			replace day_hhwork =0 if housework ==0
		gen day_other_biz = days_w if others_act ==1
			replace day_other_biz=0 if others_act==0
		gen day_fam_biz =days_w if family_act==1
			replace day_fam_biz=0 if family_act==0
		gen day_work_tot = days_w if housework==1
			replace day_work_tot = days_w if others_act ==1
			replace day_work_tot = days_w if family_act==1
			replace day_work_tot =0 if not_working ==1
			
		label var day_hhwork "Days per week doing housework during past 12 months"
		label var day_other_biz "Days per week working outside the household during past 12 months"
		label var day_fam_biz "Days per week working on the family farm /biz during past 12 months"
		label var day_work_tot "Days per week working (any activity) during past 12 months"
		
		keep pid qid family_act others_act housework not_working day_hhwork day_other_biz day_fam_biz day_work_tot
		
	tempfile labour_r1
	save `labour_r1'
	
	use `wave1_almost'
	duplicates drop qid pid, force
		merge 1:1 qid pid using `labour_r1'
		
	tempfile wave1
	save `wave1'	
		
		
	 *** ENDLINE **************************************************************************

	* roster
		use "$endline/Anonymous/Section_A.dta" , clear
			
			tostring caseid, gen(caseid2)
			gen caseid3 = substr(caseid2, 2, .)
			destring caseid3, replace
			
			tostring memberid, gen(mem2)
			gen mem3= substr(mem2, 7, 4)
			destring mem3, replace
			tostring mem3, gen(pid)
			tostring caseid3, gen(qid)
			
			gen u_id = qid+"00"+pid
			
			drop mem3 caseid3 
			
			gen wave=2
			ren hha3 age
			ren hha4 hh_rel
			
			destring qid, replace
			destring pid, replace
			destring u_id, replace
			***gen hh size variable
			bys qid: gen hhsize=_N
			label var hhsize "Household size"
		
		* note: target age has changed - now it is 5-20 so i can have a panel
		* Note: in the data collection the target age was 18 years max but since age is not such a definite concept in ghana
		* i allowed the targetage to go up to 21 years as otherwise i was dropping some of the original kids 
			gen targetage2 = 0 if age !=.
			replace targetage2 =1 if age >=4 & age<=21
			label var targetage2 "Target age second wave (4-21 years)"
			
			*** gen n siblings below 5
			bys qid: gen tag=1 if age<5
			bys qid: egen tag1=count(tag)
			ren tag1 n_under5
			label var n_under5 "Number of children under 5 years"
			drop tag
			
			* gen number of children target age
			bys qid: gen tag=1 if targetage2==1
			bys qid: egen tag1=count(tag)
			ren tag1 n_target2 
			label var n_target2 "Number of children of target age 2 wave (5-18y)"
			drop tag
			
			* gen headsex 
			gen headid1=u_id if hh_rel==1
			destring headid, force replace
			bys qid: egen headid = min(headid1)

			label var headid1 "Head of the household id"
			gen headmale1=hha2 if hha2==1 & hh_rel==1
				replace headmale1=0 if hha2 ==2 & hh_rel==1
			bys qid: egen headmale =min(headmale1)
			label var headmale "Head of the household is male"
				
			* gen headage
			gen headage1 = age if hh_rel==1
			bys qid: egen headage= min(headage1)
			drop headage1 headmale1 headid1
			label var headage "Age of household head"			
			
			* gen polygamous household
			gen polygamous = 0 
				replace polygamous =1 if hh_rel ==3 | hh_rel==4 | hh_rel==5
				label var polygamous "Household is polygamous"
				
			* gen mum
			* note: assumptions this is the mum for the polygamous hh and for the hh in which kids other than the offspring are living 
			
			gen mumid1=u_id if hh_rel==2 & headmale==1 & hha2==2
				replace mumid1=u_id if hh_rel ==1 & headmale ==0 & hha2==2 
			destring mumid1, force replace
			bys qid: egen mumid =min(mumid1)
			label var mumid "Mother id"
			
			gen mumage1 = age if mumid1!=.
			bys qid: egen mumage =min(mumage1)
			label var mumage "Mother's age"
			
			drop mumage1 mumid1
			ren Arm arm
			
			keep caseid qid memberid u_id wave arm u_id hha2 age hh_rel region district hhsize targetage n_target mumage mumid headid headmale headage polygamous n_under5 locid iwerid
		tempfile roster_w2
		save `roster_w2'
	
		******** ANTHRO ****
		use "$endline/Anonymous/Section_R.dta" , clear
			duplicates tag memberid, gen(tag)
			drop if tag
			drop tag
			merge 1:1 memberid using `roster_w2' , keepusing(hha2 age qid u_id)
			keep if _m==3
			drop _m

			ren ciid pid
			**** it seems all dates of birth are missing for girls - i will need to get them from the first round
			merge 1:1 qid pid using `anthro', keepusing(birthday agechild)
			
			drop if _m ==2
			drop _m
		
			generate birthday2=mdy(hhr4, hhr3, hhr5)
			format birthday2 %d
			replace birthday2 = birthday if birthday2 ==.
		
			gen dummy_bday=0 if birthday2 ==.
				replace dummy_bday =1 if birthday2 !=.
			label var dummy_bday "Child has birthday data to compute anthro by months"
			
			*** since day of the interview is missing, i will assume a day in between the fieldwork 
			gen month=2
			gen day=24
			gen year=2016
			
			generate dateint = mdy(month, day, year)
			format dateint %d
	
			gen agedy= (dateint - birthday2)
			gen agemonth = agedy/ (365/12)
			gen agechild2=floor(agemonth) 
			
			label var agechild2 "Child age in months - wave 2"
	  
			* understand if everything is ok with the new agechild var
			gen diff= agechild2 - agechild
			
			gen agenew=agechild2/12
			gen agenew2=hhr2*12
			gen diff2=agechild2-agenew2 if diff>36 & agechild!=.
			edit agechild2 agechild hhr2 diff diff2 agenew2 if diff>36 & diff!=. & diff2>10
			gen diff3=agenew2-agechild
			replace agechild2 =agenew2 if diff>36 & diff!=. & diff3<35			
			
			
			*** as in wave 1, i will replace day of birth for kids 30 june as date of birth
			recode hhr3 hhr4 hhr5 (99=.) (9999=.)
			gen year2= year-age if dummy_bday==0
				replace hhr5 =year2 if hhr5==. & dummy_bday==0
			
			replace hhr3 =30 if hhr3==. & dummy_bday==0
			replace hhr4 =6 if hhr4==. & dummy_bday==0
			
			replace birthday2=mdy(hhr4, hhr3, hhr5) if dummy_bday==0
				
			replace agedy= (dateint - birthday2) if dummy_bday==0
			replace agemonth = agedy/ (365/12) if dummy_bday==0
			replace agechild2=floor(agemonth)  if dummy_bday==0
			
			gen tag =1 if agechild2 <0
			replace agedy =(dateint - birthday) if tag ==1 & birthday!=.
			replace agemonth = agedy/ (365/12) if tag==1
			replace agechild2=floor(agemonth)  if tag==1
			drop if agechild2 <0			
			
		* clean height variable
			replace hhr7 =hhr7*10 if  hhr7<30
			replace hhr7 =. if hhr7 >200
			
			replace hhr6 =hhr6*100 if hhr6==1 & agechild2==174 & hha2==1 & age==14
			replace hhr6 =hhr6*100 if hhr6==1  & agechild2==139 & hha2==2 
			replace hhr6 =hhr6*100 if hhr6==1  
			drop agechild 
			
	tempfile cleananthro_w2 diff agenew agenew2 diff2 diff3
	save `cleananthro_w2'
		
	**** gen z-scores for children between 0-59 months with zscore06
			keep if agechild2 <60    
			zscore06 , a(agechild2) s(hha2) h(hhr7) w(hhr6)
			
			* replace implausible values
			foreach v of varlist haz06 waz06 whz06 bmiz06 {
				replace `v' =. if `v' < -5 | `v' >5
				}
			ren haz06 haz
			ren waz06 waz 
			ren whz06 whz
			ren bmiz06 baz
			
			keep qid pid hhr8 agechild haz waz baz whz birthday  hhr7 hhr6
		tempfile anthro06
		save `anthro06'
		
		**** now i compute the anthro for the older children - this time i need to use the WHO 2006 reference standards for older children
		use `cleananthro_w2', clear
			keep if agechild2 >59
			
			*Growth Charts used: version US - because it's newest and it is the one commonly used
			**height_for_age
			egen haz = zanthro(hhr7, ha, US), xvar(agechild2) gender(hha2) gencode(male=1, female=2) ageunit(month) 
			
			* bmi for age
			gen hhs6m=hhr7/100
			order hhs6m, after (hhr7)
			gen bmi=(hhr6/(hhs6m*hhs6m))
			replace bmi =. if bmi>50
			 
			egen bcat = zbmicat(bmi), xvar(agechild) gender(hha2) gencode(male=1, female=2) ageunit(month)
			egen baz = zanthro(bmi, ba, US), xvar(agechild2) gender(hha2) gencode(male=1, female=2) ageunit(month) 

			keep qid pid haz baz bmi agechild2 hhr8 birthday hhr6 hhr7 bcat
			
			gen ageyear =agechild2/12
			gen targetage1 =0 if ageyear!=.
				replace targetage1=1 if ageyear>=4 & ageyear <16			
			
			append using `anthro06'
		
			tostring qid, gen(qid2)
			tostring pid, gen(pid2)
			gen v="00"
			egen u_id = concat(qid2 v pid2) 
			drop qid2 pid2 v
			ren agechild2 agechild
			ren  hhr6 chweight
			ren  hhr7 chheight
			label var chheight "Child height (cm)"
			label var chweight "Child weight (kg)"
			
			destring u_id, replace
			tempfile anthro
			save `anthro'
			
		use `roster_w2'
		merge 1:1 u_id using `anthro'
				keep if _m==3 | _m==1
				drop _m
				*keep if targetage2 ==1 
			
		tempfile roster_ant_w2
		save `roster_ant_w2'
			
			************ COGS ********ss************************
		**This section computes Scores on language, maths and other tests 
		
	use "$endline/Anonymous/Section_S.dta" , clear
			merge 1:1 caseid memberid using "$endline/Anonymous/Section_T.dta"
			ren _merge merge1
			merge 1:1 caseid memberid using "$endline/Anonymous/Section_U.dta"
			ren _merge merge2
			merge 1:1 caseid memberid using "$endline/Anonymous/Section_V.dta"
			ren _merge merge3
			merge 1:1 caseid memberid using "$endline/Anonymous/Section_W.dta"
			ren _merge merge4
	
	* literacy is section u 
	foreach var of varlist hhu1 - hhu15 {
			gen `var'_rev = `var' if inlist(`var' , 1)
			}
	
	*Digit Span: it's the same between section t and s
			foreach var of varlist hht1 - hht12 {
			gen `var'_rev = `var' if inlist(`var' , 1)
			}

	*Raven: 
	foreach var of varlist hhv1 - hhv12 {
		gen `var'_rev = `var' if inlist(`var' , 1)
		}
		
	*maths: section w
			foreach var of varlist hhw1 - hhw15 {
			gen `var'_rev = `var' if inlist(`var' , 1)
			}
				
			*Generate total scores for the tests
			*language
			egen lit = rowtotal(hhu1_rev - hhu15_rev)
			drop hhu1_rev - hhu15_rev
			*mathematics
			egen maths = rowtotal(hhw1_rev - hhw15_rev)
			drop hhw1_rev - hhw12_rev
			*digit Span
			egen digit = rowtotal(hht1_rev - hht12_rev)
			*raven test
			egen raven = rowtotal(hhv1_rev - hhv12_rev)
			drop hhv1_rev - hhv12_rev
			* forward and backwards digit span
			egen fdigit=rowtotal(hht1_rev-hht6_rev)
			egen bdigit = rowtotal(hht7_rev-hht12_rev)
			
			drop hht1_rev - hht12_rev
			label var raven "Raven raw score"
			label var digit "Digit-span raw score"
			label var lit "Literacy raw score"
			label var maths "Maths raw score"
			label var fdigit "Forward digit-span raw score"
			label var bdigit "Backwards digit-span raw score"
			
			duplicates tag memberid , gen(tag)
			drop if tag!=0
				
		pca lit maths
			predict score
			ren score aggre_lear 
			label var aggre_lear "PCA literacy and maths"
			
			pca digit raven
			predict score
			ren score aggre_cog
			label var aggre_cog "Aggregate score cognition tests (digit, raven)"
						
			pca lit maths digit raven 
			predict score
			ren score aggre_edu
			label var aggre_edu "Aggregate score lit, maths, digit, raven"
	
		keep caseid memberid raven maths lit digit aggre_* fdigit bdigit 
		
	tempfile cogs_w2
	save `cogs_w2'
					
	use `roster_ant_w2'
	merge 1:1 memberid using `cogs_w2'
	keep if _m==3 | _m==1
	drop _m
	
	tempfile rosterupdate_w2
	save `rosterupdate_w2'		
			
	use "$endline/Anonymous/Section_B.dta" , clear
	
		ren hhb1 ever_enr
		ren hhb2 grade_att
		ren hhb3 qual
		ren hhb5 enrol
		ren hhb6 age517
		ren hhb71 whynotsc
		gen private =  hhb16 ==3 |  hhb16==4	
		ren hhb17 grade
		ren hhb20 absent	
		ren hhb24 repeat
		recode hhb33 (0=.) (2=0)	
		ren hhb40 sf 
		recode sf (0=.) (2=0)	
		ren hhb41 typesf
		ren hhb43 eatless
		recode eatless (2=0)
				
		ren hhb44 bringhome 
		recode bringhome (0=.) (4=.) (5=.) (2=0)
			
		ren hhb45 brkfast
		gen nobrk = brkfast ==0
		label var nobrk "Child does not eat breakfast"
		ren hhb46 foodscl	
		 
		recode hhb25 hhb26 hhb27 hhb30 (99=.) (999=.) (9998=.) (9999=.) (99999.99=.) (99999=.) (9990=.)
		egen fees = rowtotal (hhb25 hhb26 hhb27)
				replace fees=. if hhb25==. & hhb26==. & hhb27==.
			label var fees "Sum of school, PTA and uniform fees"
			
		gen hour_min=hhb12a *60
		gen time_scl = hour_min + hhb12b
		label var time_scl "Time to school in minutes"
	
		keep memberid caseid ever_enr grade_att qual enrol age517 whynotsc hhb8 hhb9  grade repeat ///
			hhb23 hhb25 hhb21 hhb26 hhb27 hhb28 hhb29 hhb30 hhb31 hhb32 hhb33 hhb35 hhb38 sf typesf hhb42 ///
			eatless bringhome brkfast foodscl hhb47 hhb48 hhb49 private nobrk absent fees  time_scl
			
		ren hhb30 cost_food 
		ren hhb33 inkind
			
		tempfile edu_w2
		save `edu_w2'	
			
		**** MERGE ROSTER WITH EDUCATION OF TARGET CHILDREN
	
	use `rosterupdate_w2' 
		merge 1:1 memberid using `edu_w2'
		drop if _m==2	
	drop _m 
	tempfile roster_edu_w2
	save `roster_edu_w2'		
			
	* Add education of mother 
	use `roster_w2' 
		merge 1:1 memberid using `edu_w2'

			bys qid: egen mumid_new=min(mumid)
			gen u_id1 =u_id
			gen tag=1 if mumid_new == u_id1
			gen edu=grade_att if tag==1
			bys qid: egen mum_grade =min(edu)
			replace mum_grade=0 if ever_enr ==2
			recode mum_grade (1=0) (10=0) (11=1) (12=2) (13=3) (14=4) (15=5) (16=6) (17=7) (18=8) (19=9) (20=10) (21=11) (22=12) (23=13) (24=10) (25=11) (26=12) (27=13) ///
			(35=7) (36=8) (37=9) (38=10) (39=10) (40=11) (41=12) (42=12) (43=12) (44=13) (45=14) (46=14) (47=14) (48=14) (33=6) (34=6) (28=7) (29=8) (30=9) (31=10)
			
			label var mum_grade "Maternal years of education"
			
		keep memberid u_id qid mum_grade
		
	tempfile mumedu_w2
	save `mumedu_w2'		
			
	use `roster_edu_w2', clear
		merge 1:1 u_id using `mumedu_w2'
			keep if _m==3
			drop _m
			order mum_grade, last		
	
	tempfile roster2_w2
	save `roster2_w2'		
			
	* Add education of head of the household
	use `roster_w2' 
		merge 1:1 memberid using `edu_w2'
		
			bys qid: egen headid_new=min(headid)
			gen tag=1 if headid_new == u_id
			gen edu=grade_att if tag==1
			bys qid: egen head_grade =min(edu)
			replace head_grade=0 if ever_enr ==2
			recode head_grade (1=0) (10=0) (11=1) (12=2) (13=3) (14=4) (15=5) (16=6) (17=7) (18=8) (19=9) (20=10) (21=11) (22=12) (23=13) (24=10) (25=11) (26=12) (27=13) ///
			(35=7) (36=8) (37=9) (38=10) (39=10) (40=11) (41=12) (42=12) (43=12) (44=13) (45=14) (46=14) (47=14) (48=14) (33=6) (34=6) (28=7) (29=8) (30=9) (31=10)
			label var head_grade "Head of the household years of education"
			
		keep memberid u_id qid head_grade
		
	tempfile headedu_w2
	save `headedu_w2'
	
	use `roster2_w2', clear
		merge 1:1 u_id using `headedu_w2'
			keep if _m==3
			drop _m
			order head_grade, last
			
	tempfile roster_update2_w2
	save `roster_update2_w2'
	
	**** Add anaemia and malaria
	use "$endline/medical data/IE Medical Impact Base_End_Line_Data file 3 15thNov2016.dta" , clear
	keep if wave==1
			
			tostring caseid, gen(caseid2)
			gen caseid3 = substr(caseid2, 2, .)
			destring caseid3, replace
			tostring caseid3, gen(qid)	
			tostring pid, replace
			gen u_id = qid+"00"+pid
			
	* create a set of variables to check whether the arm sex, weight, height contained in this file are the same
			gen sex_antro=sex
			gen height_anthro=height
			gen weight_anthro=weight2
			gen arm_anthro=arm
			gen age_anthro=age
			gen school_anthro=institution
			destring u_id, replace
			
	keep caseid u_id hb  sex_antro height_anthro weight_anthro arm_anthro age_anthro school_anthro malaria
	gen dummy_anaemia=1
	tempfile anaemia_w2
	save `anaemia_w2'
	
	use `roster_update2_w2', clear
			merge 1:1 u_id using `anaemia_w2'
			**** 846 matched
			drop if _m ==2
			
			**** there are quite a lot of differences between the ages declared in the anaemia data and the ones i have (both roster and the one calculated from birthdays)
			* i will keep the one from the household roster
			
			* sex, height, weight, arm: no difference
			
			drop _m sex_antro height_anthro weight_anthro arm_anthro age_anthro school_anthro
			
			*clean hb 
			replace hb=hb*10 if hb <100
			gen anaemia =0 if hb!=.	
					replace anaemia =1 if hb<115 & age >=6 & age <=11
					replace anaemia =1 if hb<120 & age >=12 & age <=14
					replace anaemia =1 if hb<130 & age >=15 & hha2 ==1
					replace anaemia =1 if hb<120 & age >=15 & hha2 ==2
		
			label var anaemia "Child is anaemic"
			ren hb haemo
			
	tempfile roster_anaemia_w2
	save `roster_anaemia_w2'
	
	*** add the dietary diversity module (which is only for the endline)
	
	use "$endline/Anonymous/Section_AB.dta" , clear
		
			foreach v of varlist  hhab1a - hhb3o  hhb3r hhb3p hhb3q hhb3nn {
				recode `v' (77=.) (88=.)
			}
		
			egen num_meal=rowtotal(  hhab1a hhab1b hhab1c hhab1d hhab1e hhab1f hhab1g)
			ren hhab2 veggie
			
			ren hhb3a cereals
			ren hhb3b vita
				replace vita=1 if hhb3f ==1
			ren hhb3c roots
			gen greenveg = hhb3d	
			gen veg_other = hhb3e
			ren hhb3d vegetables
				replace vegetables=1 if hhb3e==1
			ren hhb3g fruit_other
			ren hhb3h meat
				replace meat=1 if hhb3i==1
			ren hhb3j eggs	
			ren hhb3k fish	
			ren hhb3l legumes	
			ren hhb3m milk
			ren hhb3n oil
			ren hhb3o sugar
			
			gen group1 = cereals
			replace group1 = 1 if roots==1 
			label var group1 "Grain, roots or tubers"
			
			gen group2_8fg = fruit_other
				replace group2_8fg = veg_other
				replace group2_8fg = greenveg
			label var group2_8fg "Other fruits and vegetables (incl. green)"
			
			gen group3 = meat
				replace group3 = 1 if fish ==1
			label var group3 "Meat and fish"
			
			gen group4 = eggs
			label var group4 "Eggs"	
			
			gen group5 = legumes
			label var group5 "Legumes"
			
			gen group6 = milk
			label var group6 "Dairy"
			
			gen group7 = oil 
			label var group7 "Oil"
			
			gen group8 = vita
			label var group8 "Consumed Vitamin-A rich vegetables and fruit"
			
			gen group2_9fg = fruit_other
			label var group2_9fg "Other fruits"
			
			gen group9 = greenveg
			label var group9  "Consumed green leafy vegetables"
			
			gen group10 = veg_other
			label var group10 "Other vegetables"
			
			egen dd9 = rowtotal(group1 group2_9fg group3 group4 group5 group6 group9 group8 group10)
				replace dd9 = . if cereal==. & vegetables ==. & roots ==. & legumes ==. & meat ==. & fish ==. & milk ==. & eggs==. & oil ==.
			label var dd9 "Dietary diversity score (9 food groups)"
	
			gen dummy_ani =0 if group3 !=. | group4!=. | group6!=. 
				replace dummy_ani =1 if group3 ==1 | group4==1 | group6==1
			label var dummy_ani "Consumed animal-sourced protein"
			
			gen min_dd =0 if 	dd!=.
				replace min_dd =1 if dd>4 & dd!=.
			label var min_dd "Min DD: consumed more than 4 food groups"
			
			gen fru_veg=0 if group2_8fg !=. & group8 !=. & group2_9fg  !=.
			replace fru_veg =1 if group2_8fg !=0 & group2_8fg !=. | group8 !=0 & group8 !=. | group2_9fg  !=0 & group2_9fg  !=.
		
			keep caseid memberid num_meal dd9 group* hhab1d veggie hhb3nn hhb3q hhb3p hhb3r dummy_ani min_dd
			
			ren hhb3nn proc_meat
			ren hhb3q fried
			ren hhb3r cake
			ren hhb3p fizzy
			duplicates drop memberid, force
		tempfile diet_w2
		save `diet_w2'
	* Time use	
		use "$endline/Anonymous/Section_AC.dta" , clear	
			
			tostring caseid, gen(caseid2)
			gen caseid3 = substr(caseid2, 2, .)
			destring caseid3, replace
			
			tostring memberid, gen(mem2)
			gen mem3= substr(mem2, 7, 4)
			destring mem3, replace
			tostring mem3, gen(pid)
			tostring caseid3, gen(qid)
			
			gen u_id = qid+"00"+pid
			
			drop mem3 caseid3 
			
			destring qid, replace
			destring pid, replace
			destring u_id, replace
			ren hhac1 care
			ren hhac2 domest
			ren hhac3 farm_work
			ren hhac4 out_work
			ren hhac5 sch_time
			ren hhac6 stu_time
			ren hhac7 hrs_lei 
		
			egen hrs_care=rowtotal (care domest)
			label var hrs_care "Hours spent in household chores and care"
			egen hrs_work =rowtotal(farm_work out_work)
			label var hrs_work "Hours spent at work in the household farm or for pay"
			egen hrs_stu= rowtotal( sch_tim stu_time)
			label var hrs_stu "Hours spent in school or study"
			label var hrs_lei "Hours spent in leisure activities"
			
		tempfile hrs_w2
		save `hrs_w2'	
		
		use "$endline/Anonymous/FOOD_PROGRAME.dta"	, clear
			duplicates drop memberid, force
			recode hhad1 (2=0)
		tempfile foodpro
		save `foodpro'	
		
		use `cleananthro_w2', clear
			keep u_id hhr8 hhr9 hhr10 hhr11 hhr12
			ren hhr8 sick
			ren hhr9 menarche
			recode menarche (77=.) (99=.) 
			ren hhr10 age_menarche 
			recode age_menarche (77=.) (99=.)
			replace age_menarche =. if age_menarche<10
			replace menarche =1 if menarche==0 & age_mena!=.
		
			ren hhr11 telarche
			recode telarche (77=.) (99=.)
			
		tempfile menarche
		save `menarche'
		
	**** add data on assets 
	
	use "$endline/Anonymous/assets16_1.dta", clear
		merge 1:1 caseid using "$endline/Anonymous/assets16_2.dta"
		
		recode asset1 - asset21 water_pump-other (2=0)
		*** PCA only durable assets
		pca asset1 - asset21, comp(2) vce(normal)
			predict x
			rename x wi_dur
			egen quint_dur=cut(wi_dur),group (5)
			label var wi_dur "Wealth index, durable assets"
		*** PCA only agric assets
			pca   water_pump - other, comp(2) vce(normal)
			predict x
			rename x wi_agr
			egen quint_agr=cut(wi_agr),group (5)
			label var wi_agr "Wealth index, agricultural assets"
		*** PCA (as in clement's, all assets bumped together)
			pca asset1 - other, comp(2) vce(normal)
			predict x
			rename x wi_all
			egen quintile=cut(wi_all),group (5)
			label var wi_all "Wealth index, all assets"
			label variable quintile "Quintile group"
	
		keep caseid wi_all wi_dur wi_agr quint_dur quint_agr quintile
	tempfile assets_w2
	save `assets_w2'
	
	use "$endline/Anonymous/Section_G.dta", clear
		collapse (sum)  hhg1 hhg3, by(caseid )
		ren hhg1 land_use
		label var land_use "Owned or used land"
		ren  hhg3 land_size 
		label var land_size "Land size"
		
	tempfile land_w2
	save `land_w2'
		
	use "$endline/Anonymous/Section_I.dta", clear
		recode hhi1 (2=0)
		ren hhi1 sold
		
		keep caseid sold
		duplicates drop caseid, force
	
	tempfile sales_w2
	save `sales_w2'
	
	use "$endline/Anonymous/Section_N.dta", clear
		recode hhn1 (2=0)
		collapse  hhn1 , by(caseid )
		ren hhn1 livestock
	
	tempfile livestock_w2
	save `livestock_w2'	
		
	use `roster_anaemia_w2'
		merge 1:1 memberid using `diet_w2'
			*keep if _m==3
			drop _m
			
	duplicates drop u_id, force		
		merge 1:1 u_id using `hrs_w2'
			*keep if _m==3
			drop _m
		
		merge 1:1 memberid using `foodpro'	
			drop if _m ==2
			drop _m
			
		merge m:1 caseid using "$endline/Anonymous/HHL.dta", keepusing (ethnicty lang)
			ren ethnicty ethnic   
			*keep if _m ==3
			drop _m
			
		merge m:1 caseid using "$endline/Anonymous/Section_Z.dta", keepusing(hhz1 hhz1a hhz2 hhz2a)		
			*keep if _m ==3
			drop _m
			recode hha2 (2=0)
			ren hha2 male
			label var male "Child is male"
			
		merge 1:1 u_id using `menarche'
			drop _m
			
		merge m:1 caseid using 	 `assets_w2'
			drop _m
			
		merge m:1 caseid using `land_w2'
			drop _m
			
		merge m:1 caseid using `sales_w2'
			drop _m
			
		merge m:1 caseid using `livestock_w2'	
			drop _m	
			
		recode enrol (2=0)	
		*gen primary school dummy
		
		gen primary=0 if grade!=.
			replace primary =1 if grade >=11 &  grade<=16
		label var primary "Child is enrolled in primary school"
		
		gen age_kind=0 if age!=. 
			replace age_kind =1 if age ==4 | age ==5
			label var age_kind "Child is aged 4-5 years"
			
		gen enrol_k=. if grade!=.
			replace enrol_k=0 if age_kind ==1 & grade!=1 | age_kind==1 & grade!=10
			replace enrol_k =1 if grade ==1 & age_kind==1 | grade==10 & age_kind==1
		label var enrol_k "Child aged 4-5 years is enrolled in kindergarten"	
		
		gen targetage= 0 if age!=.
			replace targetage =1 if age>=4 & age <=15
			label var targetage "Child is aged 4-15 years"
			
		gen enrol1 = .
			replace enrol1 =1 if enrol==1 & age>=6 & age<=15 & primary==1 
			replace enrol1=0 if enrol==0 & age>=4 & age<=15
		label var enrol1 "Number of children 4-15y enrolled in primary"
		* this is a standard measure of net primary school enrolment 
		* note: almost all kids of target age are in primary, only 2% are in jss
		* this is a standard measure of net primary school enrolment 
		* note: almost all kids of target age are in primary, only 2% are in jss
		
		gen abs_share = absent /7 if enrol1==1 | enrol_k==1
		label var abs_share "Share of school days in the past 7 days for enrolled 4-15-year-olds"
		
		gen net_att =1 - abs_share
		label var net_att "Share of school days attended conditional on enrolment 4-15-year-olds"
		
		tostring u_id, replace
		
		* clean school feeding variable
			gen sf1 =sf
			replace sf1=1 if hhad3 =="GHANA GOVERNMENT" | hhad3 =="GOVERNMENT" |  hhad3 =="GOVERNMENT OF GHANA" | hhad3 =="GOVT" | hhad3 =="GOVT OF GHANA" | hhad3 ==" government of ghana"
			
			ren sf sf_old
			ren sf1 sf
			
		gen pres_r2=1
	tempfile w2_almost
	save `w2_almost'	
	
	** April 2019: add labour 
	
	use "$endline/Anonymous/Section_C.dta", clear
	
	* only focus on main activity

		gen family_act = hhc1a==1 | hhc1a ==6 | hhc1a ==7 | hhc1a==10 
		label var family_act "Family farm or business/enterprise"
		
		gen others_act = hhc1a==2 | hhc1a==4 | hhc1a==5 | hhc1a==8 
		label var others_act " Activity outside the household" 
		
		gen housework = hhc1a ==3
		label var housework "Housework" 
		
		gen not_working =ec==2
		label var not_working "Not working" 
		
		ren hhc2a months_act
		label var months_act "Months spent in main activity"
		
		ren hhc3a weeks_act
		label var weeks_act "Weeks per month spent in main activity"
	
		ren hhc4a days_act
		label var days_act "Days per week spent in main activity"
		
		gen tot_weeks = months_act * weeks_act
		replace tot_weeks =48 if tot_weeks > 48 & tot_weeks!=.
		gen prop_weeks = tot_weeks / 48
		
		gen days_weighted = days_act * prop_weeks
			replace days_weig = 0 if not_working ==1
		label var days_weighted "Days per year spent in activity"
		
		gen day_hhwork = days_w if housework==1
			replace day_hhwork =0 if housework ==0
		gen day_other_biz = days_w if others_act ==1
			replace day_other_biz=0 if others_act==0
		gen day_fam_biz =days_w if family_act==1
			replace day_fam_biz=0 if family_act==0
		gen day_work_tot = days_w if housework==1
			replace day_work_tot = days_w if others_act ==1
			replace day_work_tot = days_w if family_act==1
			replace day_work_tot =0 if not_working ==1
			
		label var day_hhwork "Days per week doing housework during past 12 months"
		label var day_other_biz "Days per week working outside the household during past 12 months"
		label var day_fam_biz "Days per week working on the family farm /biz during past 12 months"
		label var day_work_tot "Days per week working (any activity) during past 12 months"
		
		keep caseid memberid family_act others_act housework not_working day_hhwork day_other_biz day_fam_biz day_work_tot	
	
	tempfile labour_r2
	save `labour_r2'
	
	use `w2_almost'
		merge 1:1 caseid memberid using `labour_r2' 
		drop _m
	tempfile w2
	save `w2'
		
	use `wave1'
	append using `w2'	
	
			order wave, bef(locid)
			
			bys qid: gen hh_pres1=1 if wave==1
			bys qid: egen hh_pres_r1 =min(hh_pres1)
			bys qid: gen hh_pres2=1 if wave==2
			bys qid: egen hh_pres_r2 =min(hh_pres2)
			bys qid: gen hh_trak=0 
				bys qid: replace hh_trak=1 if hh_pres_r2==1 & hh_pres_r1==1
				
			* with this calculation: 22 hh are lost
						
			* include arm to the first wave
			gen arm2=arm if wave ==2
			bys qid: egen arm3=min(arm2)
			replace arm =arm3 if wave ==1
			drop arm2 arm3
			label define arm 0 "Control" 1 "GSFP" 2 "HGSF"
			label values arm arm
			
			bys locid: egen treatment= min(arm)
			gen tag=1 if arm ==.
			replace arm=treatment if arm==.
			edit arm locid wave treatm if tag==1
			drop treatment tag
			
			*comparison between gsfp and hgsf
			gen arm3=0 if arm==1
			replace arm3=1 if arm==2
			label define arm2 0 "GSFP" 1 "HGSF"
			label values arm3 arm2
		
			ren hhb23 agestart	
			replace agest=. if agest>18
			
			tab arm, gen(arm1)
			gen arm2=0 if arm==0 & arm!=.
			replace arm2=1 if arm!=. & arm!=0
			label define primarm 0 "Control" 1 "School feeding"
			label values arm2 primarm
			
			recode urbrur (2=0)
			ren urbrur urban
			label var urban "Urban"
			*** need to get urban for round 2, this is for wave 1 only
			**** make the wealth index more meaningful by transforming into something that varies from 0 to 100 #
			ren wi_all wi
			
			foreach v of varlist wi wi_dur wi_agr {
			foreach i in 1 2 {
			egen max_`v'`i'= max(`v') if wave==`i'
			egen min_`v'`i'= min(`v') if wave==`i'
			gen `v'_sca_`i'=((`v'-min_`v'`i')/(max_`v'`i'-min_`v'`i' ))*100
			drop max_`v'`i' min_`v'`i'
			}
			}
			replace wi=wi_sca_1 if wave ==1
			replace wi=wi_sca_2 if wave ==2
			replace wi_dur=wi_dur_sca_1 if wave ==1
			replace wi_dur=wi_dur_sca_2 if wave ==2
			replace wi_agr=wi_agr_sca_1 if wave ==1
			replace wi_agr=wi_agr_sca_2 if wave ==2
			
			gen sac =1 if age >=6 & age<=11
			replace sac =0 if age !=. & age >=12 & targetage ==1 | age !=. & age >=12 & targetage2 ==1
			label define sac 0 "Adolescent (12y or more)" 1 "Primary school age (6-11y)"
			label values sac sac
			
			label var haz "Height-for-age z-scores"
			label var baz "BMI-for-age z-scores"
			 
			 gen rep_grade = 0 if repeat==0
				replace rep_grade=1 if repeat !=0 & repeat !=.
				label var rep_grade "Child has repeated a grade"
			recode wave (1=0) (2=1)
			
			*** gen dummy 
			gen stunted =haz < -2
			gen thin = baz <-2
			gen overweight = 0 if bcat!=.
				replace overweight =1 if bcat >1 & bcat!=.
			label var stunted "Child is stunted"
			label var thin "Child has low bmi for her age"
			*label var overweight "Child is overweight / obese"
			
			*** clean mumgrade headgrade var: I will use the one collected in round 1 and if that was missing the one collected in round 2
			
			gen mumedu0=mum_grade if wave==0
			gen mumedu1=mum_grade if wave ==1
			bys u_id: egen momedu_w0=min(mumedu0)
			bys u_id: egen momedu_w1=min(mumedu1)
			gen mumid0=mumid if wave==0
			gen mumid1=mumid if wave ==1
			bys u_id : egen mumid_w0 = min(mumid0)
			bys u_id : egen mumid_w1 = min(mumid1)
			
			gen mumedu=momedu_w0
				replace mumedu=momedu_w1 if mumid_w0==mumid_w1
			bys qid: replace mumedu=momedu_w0 if mumedu==. & momedu_w0!=.
			bys qid: replace mumedu=momedu_w1 if mumedu==. & momedu_w1!=. & mumid_w0==mumid_w1
			
			gen head0 = head_grade if wave ==0
			gen head1 = head_grade if wave ==1
			bys u_id: egen head00=min(head0)
			bys u_id: egen head11=min(head1)
			
			gen headedu = head00 if head00!=.
				replace headedu= head11 if headedu==.

			bys qid: egen headedu_fin= min( headedu)
			
			drop headedu head0 head1 head00 head11 mumedu0 mumedu1 momedu_w0 momedu_w1 mumid0 mumid1 mumid_w0 mumid_w1
			rename headedu_fin headedu
			
			label var mumedu "Mother's education in years"
			label var headedu "Household head education in years"
			label var wi "Wealth index"
			
			*** gen variable progression to secondary
			gen p6=1 if grade== 16 & wave==0 | grade== 15 & wave==0
			bys u_id: egen p6_d=min(p6)
			gen progr=. if p6_d==0
				replace progr=1 if p6_d==1 & grade>16 & grade!=.
				replace progr=0 if p6_d ==1 & grade <=16 & grade!=.
			bys u_id: egen progr_sec=max(progr)
			drop p6 p6_d progr
			label var progr_sec "Child was enrolled in P5/P6 at baseline and progressed to Secondary"
			
			zscore aggre_lear
			zscore aggre_cog
			zscore aggre_edu
		
		*** create new identifiers district locid
		
		bys district: gen dist_code=1 if _n ==1
		replace dist_code=sum(dist_code)
		
		bys locid: gen locid_new =1 if _n==1
		replace locid_new=sum(locid_new)
		
		order dist_code locid_new , bef(male)
				
		* clean amount of money 
		replace  hhb49=. if  hhb49>100
		
		* upper west, upper east, 
		gen north = 0 if region!=.
			replace north=1 if region == 8 | region ==9 | region ==10
			label var north "Upper West, upper east, north regions"
	
		***** gen tag school feeding at baseline in order to drop these communities from the sample *****
		*************************************************************************************************
		*** I use the school data (instead of the excel roster) to do that: the excel roster was excluding 5 schools and it was including one that 
		**** was not correct **** 24 SCHOOLS IN TOTAL 
		
		gen sf2 =0 
	
	* TOTAL OF 5 SCHOOLS IN ASHANTI	******
	
	* ashanti, anofo north asunaye
			replace sf2 = 1 if district==87 & locid ==614
		* ashanti, amansie, abuakaa
			replace sf2 = 1 if district==89 & locid ==603
			*** note: the school has 2 separate grounds in the school survey we have interviewd both schools (so in this school survey they count as separate)
		* ashanti, asante, yaw bronia
			replace sf2 = 1 if district==93 & locid ==608
		* ashanti, atwima, afrancho
			replace sf2 = 1 if district==96 & locid ==618
		* ashanti, atwima, adumasa
			replace sf2 = 1 if district==96 & locid ==617
	
	* TOTAL OF 1 SCHOOL IN EASTERN		
	
		* eastern , atwa, asunafo
			replace sf2 =1 if district == 67 & locid == 504
	
	* TOTAL OF 1 SCHOOL IN GREATER ACCRA
	
		* accra, lezdoku, nungua
			replace sf2 =1 if district == 42 & locid == 302
	
	* TOTAL OF 6 SCHOOLS IN NORTHERN (added the last 3 from the school roster as compared to excel roster)
	
		* northern, east mamp,nanori
			replace sf2 =1 if district == 139 & locid == 804
		* northern, nanumba, simnoba
			replace sf2 =1 if district == 143 & locid == 809
		* northern, savologu, pong
			replace sf2 =1 if district == 146 & locid == 824
		* northern, saboba, natagu
			replace sf2=1 if  district == 145 & locid == 813
		** note: by removing this locid, i remove both natab and natagu schools, who had both school feeding at baseline (both interviewed 
		* northern, east gol, wulanyili
			replace sf2=1 if  district == 138 & locid == 821
		
	**** TOTAL OF 3 SCHOOLS IN UPPER EASTERN (last 2 schools added from school survey)	
				
		* upper eastern, bakwo, Mognori
			replace sf2 =1 if district == 154 & locid == 909
		* upper eastern, bawku mu, Binduri
			replace sf2 =1 if district == 154 & locid == 908
		* upper eastern, garu tem, Batkuam 
			replace sf2 =1 if district == 159 & locid == 913	
			
	*** TOTAL 8 IN UPPER WESTERN 
		* upper western, girapa, gberkuo
			replace sf2 =1 if district == 163 & locid == 1002
			**** NOTE: THIS DOES NOT APPEAR TO HAVE SF AT BASELINE FROM THE SCHOOL SURVEY BUT IT DOES FROM THE EXCEL ROSTER
			* SINCE KIDS AT BASELINE ARE EFFECTIVELY TAKING SCHOOL FEEDING I DECIDED TO REMOVE IT AS WELL
		* upper western, girapa, degri/duori
			replace sf2 =1 if district == 163 & locid == 1001
		*  upper western, lawra, eremon
			replace sf2 =1 if district == 165 & locid == 1003
		* upper western, wa east, chaggu
			replace sf2 =1  if district == 170 & locid == 1005
		* uw, wa municipal, tabiase
			replace sf2 =1  if district == 169 & locid == 1009
		* uw, wa municipal, nakori
			replace sf2 =1  if district == 169 & locid == 1004
		* uw, wa west, kachiu
			replace sf2 =1 if district == 171 & locid == 1007
		*up, lawra, boo
			replace sf2 =1 if district == 165 & locid == 1008
	
	* TOTAL OF 1 IN VOLTA
		* volta, nwanta, nabu
			replace sf2 =1 if district == 58 & locid == 409
		
			
	* drop all the 24 schools that were known to have school feeding at baseline (according to the excel roster)
	
	drop if sf2==1

	*4547 observations deleted (BEFORE: BASED ON EXCEL SURVEY: 3899)
			
	***** MISSING ARM VARIABLE in wave 0: there are 2 arms observations for northern, BUNKPURU, BADIMSUG & UE, GARU, BTTKWAN
	* i think they did not go to there at follow up
	* i will drop those 360 observations
	
		drop if arm==. & wave==0
	
	* THIS WAS THE ONLY WAY IN WHICH I WAS MATCHING TO THE ROSTER IN ORDER TO DROP COMMUNITIES AT BASELINE
	
		* clean age variable: note: "age" is self reported age - "ageyear" is child age in months/12 (more accurate)
		* age_r1 age_r2 are constructed based on ageyear
		
		replace ageyear=age if ageyear==. & age <=15
		ren agechild agemo
		replace agemo=ageyear*12 if agemo==.
		
		gen age1 = ageyear if wave==0
		gen age2=ageyear if wave==1
		bys u_id: egen age_r1=min(age1)
		bys u_id: egen age_r2=min(age2)
		
		gen diffage= age-age_r1
		replace ageyear=age if ageyear==. & wave==1 & diffage <4
		replace ageyear= age_r1+3 if ageyear==. & wave==1 & diffage >=4
		drop diffage age1 age2
		replace age_r2=age_r1+2.5 if age_r2==.	
		
		replace age_r2 = age_r1+2.5 if  age_r2 >5 & age_r2<7

		**** clean the school feeding variable at follow up by using the other additional variable on receipt of school feeding
		
		recode hhad1 (2=0)
		compare hhad1 sf if wave==1
		replace sf=hhad1 if sf==. & wave==1
		
		* clean number of days receive school feeding - bring the maximum to 5 school days
		replace hhb42=5 if hhb42>5 & hhb42!=.
		* same with number of days has breakfast 
		replace brkfast =5 if brkfast>5 & brkfast!=.
		* same number of days come home to eat lunch
		replace hhb47 =5 if hhb47 >5 & hhb47!=.
		replace hhb48 =5 if hhb48 >5 & hhb48!=.
		
		*** GEN VARIABLE CURRENT GRADE AT BEGINNING OF THE INTERVENTION
		
		gen grade_var=0 if  grade == 10 & wave==0
			replace grade_var =1 if grade >=11 & grade <=16 & wave==0
		bys u_id: egen grade_var2 = min(grade_var)
		drop grade_var
		ren grade_var2 grade_var	
			label var grade_var "Level of school in which child was at baseline"
			label define grade_var 0 "Child was in KG" 1 "Child was in P1-P6"
			label values grade_var grade_var
		
		ren  brkfast n_break
		ren foodscl n_food2sc
		ren  hhb42 n_sf
		ren hhb47 n_homelunch
		ren hhb48 n_moneysc
		ren hhb49  moneyfood
		
		gen absent2 =absent
			replace absent2 =5 if absent > 5 & absent!=. 
			label var absent2 "Days of absence (truncated to 5)"
		gen days_attend = 5-absent2 if enrol ==1
		label var days_attend "Days attended conditional on enrolment"
		
		recode grade_att (2=.) (9=.) (1=0) (10=1) (11=2) (12=3) (13=4) (14=5) (15=6) ///
			(16=7) (17=8) (18=9) (19=10) (24=11) (25=12) (26=13)
		label drop hhb2 
		label define grade_att 1 "Kindergarten" 2 "P1" 3 "P2" 4 "P3" 5 "P4"  6 "P5" 7 "P6" 8 "JSS1" ///
		9 "JSS2" 10 "JSS3" 11 "SSS1" 12 "SSS2" 13 "SSS3" 
		label values grade_att grade_att
		
		recode grade (0=.) (1=.) (2=.) (10=1) (11=2) (12=3) (13=4) (14=5) (15=6) ///
			(16=7) (17=8) (18=9) (19=10) (24=11) (25=12) (26=13) 
			replace grade =13 if grade ==28
				
		drop enrol1
		bys u_id: gen enrol1 = enrol if wave ==0
		bys u_id : egen enrol_r1 = min(enrol1)
		bys u_id : gen enrol2 =enrol if wave ==1
		bys u_id: egen enrol_r2 = min(enrol) if wave ==1 
		
		gen drop_out =0 if enrol_r1 ==1 & enrol_r2 ==1 
		replace drop_out=1 if enrol_r1 ==1 & enrol_r2==0 & ageyear<16
		replace drop_out =1 if enrol_r1==0 & ever_enr ==1 & ageyear<16
		*** education is compulsory in ghana between 4 and 15 ****
		label var drop_out "Child has drop out"	
	
	gen poor = 0 if pcexp !=.
		replace poor=1 if pcexp < =1314
	gen expoor=0 if pcexp !=.
		replace expoor=1 if pcexp < =792.05
	bys qid: egen poor_r1=min(poor)
	bys qid: egen ext_poor=min(expoor)
	label var poor "Household fall below 2012/13 poverty line"
	label var ext_poor "Household fall below 2012/13 extreme poverty line"
	drop poor expoor
	
	ren hhb25 sch_fees
	ren hhb26 pta_fees
	ren hhb27 unif_fees
	label var cost_food "Money spent on food, board and lodging"
	
	recode sick (77=.) (99=.)
	
	 * clean menarche variable in order to appear in both waves
		 bys u_id: egen age_m=min(age_menarche) if male ==0 
		 bys u_id: egen menar1=min(menarche) if male==0 & menarche!=.
			replace menar1 =0 if age_r1 < age_me & wave==0 
			
		drop menarche age_menarche
		ren menar1 menarche
		label var menarche "Girl has had menarche"
		
		ren age_m age_menarche
		label var age_menarche "Age at menarche"
	drop _m
	merge m:1 region district locid using "$baseline/School/constructed.dta", keepusing(school_id hhb8 stu_tea learn_health share_class ///
	toilet toilet water books toil_ok books_free enough_class smc_trained learn_env learn_ter learn_ter2)
	drop _m
	
	gen private_r = 0 if private ==0 & wave==0
	replace private_r =1 if private ==1 & wave==0
	bys u_id: egen private_r1 = min(private_r)
	drop private_r
	label var private_r1 "Child was in private school at baseline"
	
	gen aggre1= aggre_cog if wave==0 
	bys u_id : egen aggre_cog_r1 =min(aggre1)
	label var aggre_cog_r1 "Child's aggregate cognitive score  at baseline"
	drop aggre1
	
	gen aggre1= aggre_lear if wave==0 
	bys u_id : egen aggre_lear_r1 =min(aggre1)
	label var aggre_lear_r1 "Child's aggregate cognitive score  at baseline"
	drop aggre1
	
	gen haz1= haz if wave==0 
	bys u_id : egen haz_r1 =min(haz1)
	label var haz_r1 "Child's HAZ  at baseline"
	drop haz1	
	
	gen repeat2 = .
		replace repeat2=0 if repeat==0
		replace repeat2 =1 if repeat==1
		replace repeat2 =2 if repeat>=2 & repeat!=. & repeat<10
	
	duplicates drop u_id wave, force
	
	save "$output/prep_for_cogs", replace

	merge 1:1 u_id wave  using "$output/cogs_standard"
	drop _m
	
	foreach v of varlist zmaths zlit zraven zdigit {
		gen `v'1 = `v' if wave ==0
		bys u_id: egen `v'_r1 = min(`v'1)
		drop `v'1
		}
	
	*** clean enrolment variable 
	
	replace enrol=0 if enrol==. & wave==1 & grade==. & grade_att!=.
	replace enrol=1 if grade!=. & wave==0

	* drop grade label which is confusing
	label drop hhb17
	********** DEFINITION OF THE ANALYSIS SAMPLE *****************
	***** GEN VARIABLE RELATED TO EXPOSURE GROUPS ****************
	
	gen exp_d = 0 if age_r1>=12 & age_r1 <=15
		replace exp_d=1 if age_r1 >=5 & age_r1 <10
		replace exp_d=2 if age_r1 >=10 & age_r1 <=11

	label var exp_d "Cohort categorisation for children aged 5-15 at baseline"
	
	* clean exp_d as there were some inconsistencies with some ages (i take as more reliable the wave 2 ones)
	
	replace exp_d=1 if u_id =="1360002"
	replace exp_d=2 if u_id =="146007"
	replace exp_d=1 if u_id =="164003"
	replace exp_d=1 if u_id =="1748004"
	replace exp_d=1 if u_id =="1816003"
	replace exp_d=1 if u_id =="1912003"
	replace exp_d=1 if u_id =="1926007"
	replace exp_d=1 if u_id =="2087006"
	replace exp_d=0 if u_id =="2255003"
	replace exp_d=1 if u_id =="2087006"
	replace exp_d=0 if u_id =="2453004"
	replace exp_d=0 if u_id =="2510003"
	replace exp_d=0 if u_id =="2519002"
	replace exp_d=1 if u_id =="257003"
	replace exp_d=0 if u_id =="394003"
	replace exp_d=1 if u_id =="407006"
	replace exp_d=0 if u_id =="394003"
	replace exp_d=0 if u_id =="861007"
	replace exp_d=1 if u_id =="924005"
	replace exp_d=1 if u_id =="972003"
	
	** i start by only keeping children of target age at baseline
	drop if age_r1<5 | age_r1>15
	keep if exp_d!=.
	
	**** GENERATE ELIGIBILITY VARIABLE AT BASELINE *********
	
	bys u_id: egen secondary_r1=min(secondary)
	bys u_id: egen comple_primary_r1=min(primary_compl)
	bys u_id: egen grade6_r1=min(grade6)
	gen grade1=grade if wave ==0
	bys u_id: egen grade_r1=min(grade1)
	gen grade2 =grade if wave==1
	bys u_id : egen grade_r2 =min(grade2)
	
	replace grade_r1 =0 if grade_r1==. & age_r1 <=15 
	replace grade=grade_r1 if wave ==0 & grade ==. & grade_r2 <=3
	** this replaces the missing value for grade for the kids that just started school in 2013 
	** they had missing because it was the first year of educ so they did not complete any grade
	* but if i substitute with zero i wont miss them in the calculation of grade attained
	** i still remain with some missing at round 1 but i dont want to impute
	
	************* CRITERIA FOR ELIGIBILITY: 
	* A. BEING OF TARGET AGE AT BASELINE (5-15, AS MEASURED BY AGE_R1 VARIABLE (CONSTRUCTED FROM AGE IN MONTHS)
	* B. BEING IN GRADES 1-5 OR IN GRADE 0 (IN OTHER WORDS, BEING STILL IN KINDERGARDEN AT START)
	* C. KIDS THAT EITHER: COMPLETED PRIMARY; WERE ALREADY IN SEC; WERE IN GRADE 6 WERE DROPPED

		
	gen eli3 =1 if age_r1>=5 & age_r1<=15
		replace eli3 =0 if grade_r1>=6 
		replace eli3=0 if secondary_r1==1
		replace eli3 =0 if comple_primary_r1==1
	
	drop primary_compl grade6

	drop if eli3 ==0 & wave==0
	drop if eli3 ==0 & wave==1
	*** 1631 dropped from both rounds (836 in wave 0 and 795 in wave ==1)
	
	*** GENERATE PANEL VARIABLE
	
	bys u_id: egen child_r1=min(pres_r1)
	bys u_id: egen child_r2=min(pres_r2)
	
	bys u_id: gen child_panel = 0 if child_r1==1 & child_r2==. 
	bys u_id: replace child_panel =1 if child_r1 ==1 & child_r2==1
	bys u_id: replace child_panel = 2 if child_r1==. & child_r2 ==1
	
	gen sf_enrol =. if wave ==0
	replace sf_enrol =1 if sf ==1 & primary ==1 & wave==1
	replace sf_enrol =0 if sf==0 & primary ==1 & wave==1
	drop sf_enrol
	
	replace secondary =1 if wave ==1 & grade >6 & grade !=.
	replace secondary =0 if wave ==1 & grade <=6 & grade!=.
	
	gen exp_c=exp_d==1 | exp_d==2
	
	bys qid: egen n_child_sf =count(sf)  if wave==1 
	
	label var zlit "Age-standardised literacy score"
	label var zmaths "Age-standardised Maths score"
	label var zraven "Age-standardised Raven score"
	label var zdigit "Age-standardised Digit span score"
	
	egen learn_ind = rowmean(zmaths  zlit) 
	egen cog_ind=rowmean(zdigit zraven) 
	egen all_index=rowmean(zmaths  zlit zdigit zraven) 
	
	**** do like banerjee et al 2015 science paper: first they created standardised scores, then they average families of 
	* of z-scores, and again standardise those to the cotnrol group within each round
	
	foreach v of varlist learn_ind cog_ind all_index {
	egen `v'_mean0=mean(`v')  if wave ==0 & arm2==0
	egen `v'_sd0=sd(`v')  if wave ==0 & arm2==0
	egen `v'_mean1=mean(`v')  if wave ==1 & arm2==0
	egen `v'_sd1=sd(`v')  if wave ==1 & arm2==0
	egen `v'_mean0_new =max(`v'_mean0)
	egen `v'_sd0_new =max(`v'_sd0)
	egen `v'_mean1_new =max(`v'_mean1)
	egen `v'_sd1_new =max(`v'_sd1)
	gen `v'_0 = (`v' - `v'_mean0_new) / `v'_sd0_new if wave==0
	gen `v'_1 = (`v' - `v'_mean1_new) / `v'_sd1_new if wave==1
	gen `v'_new = `v'_0 if wave==0
	replace `v'_new = `v'_1 if wave ==1
	drop `v' `v'_mean* `v'_sd*
	rename `v'_new `v'
	}
	
	************
	foreach v of varlist learn_ind cog_ind all_index {
	foreach i in 0 1 {
	egen `v'_mean_`i'=mean(`v')  if wave ==`i' & arm2==0
	egen `v'_sd_`i'=sd(`v')  if wave ==`i' & arm2==0
	bys u_id: egen `v'_mean_`i'_new =min(`v'_mean_`i')
	bys u_id: egen `v'_sd_`i'_new =min(`v'_sd_`i')
	drop `v'_sd_`i' `v'_mean_`i' 
	rename `v'_mean_`i'_new `v'_mean_`i'
	rename `v'_sd_`i'_new `v'_sd_`i'
	}
	}
	
	label var learn_ind "Composite score: maths and literacy"
	label var cog_ind "Composite score: raven and digit"
	label var all_index  "Composite score: all outcomes"
	
	replace mumage =. if mumage >90
	
	* clean haemoglobin and malaria at wave 1
	
	replace haemo=haemo/10 if wave ==1
	replace haemo=haemo/10 if wave ==0 & haemo>99
	
	* clean educ exp
	
	recode hhb28 hhb29 hhb31 (999=.) (9999=.) (9998=.) (99999=.)
	
	* gen missing var at baseline
	
	foreach v of varlist zmaths zlit zdigit zraven { 
	gen `v'1 = `v' if wave==0
	}
	
	gen missing_math =1 if zmaths1==. & wave==0
		replace missing_math=0 if zmaths1!=. & wave==0
		
	foreach v of varlist zlit1 zdigit1 zraven1 {
		gen missing_`v'= 1 if `v'==. & wave==0
			replace missing_`v' =0 if `v'!=. & wave==0
			}
	
	foreach v of varlist missing_math missing_zlit1 missing_zdigit1 missing_zraven1 {
	bys u_id: egen `v'_n =min(`v')
	drop `v'
	ren `v'_n `v'
	}
	 
	drop hhb32 
	egen exp_edu = rowtotal (sch_fees unif_fees pta_fees unif_fees hhb28 hhb29 cost_food hhb31)
	egen tot_fees = rowtotal(   sch_fees pta_fees unif_fees)
	egen food_transp = rowtotal(inkind hhb29 cost_food)
	ren hhb28 books_exp 
	ren hhb31 extra_tuition
	
	save "$output/combined_data.dta", replace

	**** only keep data used for study in order to publish the data
	
	keep u_id child_panel zmaths zlit zraven zdigit learn_ind cog_ind all_index arm2 wave region locid male north poor_r1 missing_* enrol grade rep_grade primary days_attend exp_edu tot_fees food_transp books_exp extra_tuition cost_food child_panel zmaths zlit zraven zdigit learn_ind cog_ind all_index arm arm2 wave region agemo locid male north poor_r enrol primary days_attend grade rep_grade private sch_time stu_time hrs_care hrs_work  hrs_lei exp_c arm sick haemo not_working day_hhwork day_other_biz day_fam_biz day_work_tot num_meal n_break nobrk dd9 
	
	save  "$output/dataverse_learning.dta", replace
	
	
	
	
